Code for manuscript.


Setting up the workspace and loading data

# Packages
# ==============================================================================
library(tidyr)
library(ggplot2)
library(knitr)
library(DT)
library(stringr)
library(xtable)
library(hrbrthemes)
library(dplyr)
library(forcats)
library(finalfit)
library(dplyr)
library(stringr)
library(janitor)
library(extrafont)
library(cowplot)
library(knitr)
library(parameters)
library(here)

# Load fonts
extrafont::loadfonts(quiet = TRUE)


# Load data
# ==============================================================================
bats_res = readRDS("data/Ethiopia_bat-info_test-summary.RDS")
bats_res$SiteName = trimws(gsub("-\\<Bat\\>|\\<Bat\\>", "", bats_res$SiteName))
bats_res$ConcurrentSamplingSite = bats_res$SiteName

tests = readRDS("data/Ethiopia_bat-test-info.RDS")
tests$SiteName = trimws(gsub("-\\<Bat\\>|\\<Bat\\>", "", tests$SiteName))
tests$Interface = ifelse(tests$ScientificName %in% "Rhinopoma hardwickii",
                      "Cave", "Building")

events = readRDS("data/Ethiopia_bat-sampling-events.RDS")

df = bats_res
df$Family = str_to_title(df$Family)
df$AgeClass = word(df$AgeClass, 1)

df$Interface = ifelse(df$ScientificName %in% "Rhinopoma hardwickii",
                      "Cave", "Building")

date_order = unique(format(as.Date(sort(df$EventDate), format = "%Y-%m-%d"),
                        "%Y-%B"))
df$EventMY = format(as.Date(df$EventDate, format = "%Y-%m-%d"), "%Y-%B")
df$EventMY = factor(df$EventMY, levels = date_order, ordered = TRUE)

tests = left_join(tests, df[ , c("AnimalID", "Family")], by = "AnimalID")



Sampling events

df %>%
    group_by(SiteName, EventDate, SeasonModelled) %>%
    tally(name = "No. of bats sampled") %>%
    group_by(EventDate) %>%
    arrange(SiteName, EventDate) %>%
    adorn_totals("row") %>%
    datatable(options = list(dom = 't'), rownames = FALSE)



Viral families tested

df %>%
    group_by(tests_done) %>%
    tally() %>%
    datatable(options = list(dom = 't'), rownames = FALSE)



Species of bats tested

df %>%
    select(Family, ScientificName) %>%
    group_by(Family, ScientificName) %>%
    tally() %>%
    adorn_totals("row") %>%
    datatable(options = list(dom = 't'), rownames = FALSE)



Number of bats testing positive

Number of bats tested = 589

Number of bats testing positive = 99

Percent bats testing positive = 16.8081494



Figure 1. Sampling sites

Made with ArcGIS 10.1

out_file = file.path(here(), "manuscript_figures")
myimages = list.files(out_file, pattern = glob2rx("Figure-1*png"),
                      full.names = TRUE)
include_graphics(myimages)



Table 1. Summary of bats sampled and viral testing results

df_ani =

    tests %>%
    select(SiteName, Interface, Family, ScientificName, AnimalID, VirusGroup) %>%
    group_by(SiteName, Interface, Family, ScientificName) %>%
    summarize(total_bats = length(unique(AnimalID))) %>%
    as.data.frame() %>%
    adorn_totals("row")



df_pos_ani =

    tests %>%
    select(SiteName, Family, ScientificName, AnimalID, VirusGroup) %>%
    filter( !is.na(VirusGroup)) %>%
    group_by(SiteName, Family, ScientificName) %>%
    summarize(pos_bats = length(unique(AnimalID))) %>%
    as.data.frame() %>%
    adorn_totals("row")


df_ani$pos_bats = paste0(
                      round(df_pos_ani$pos_bats/df_ani$total_bats * 100),
                      "% (", df_pos_ani$pos_bats, ")")
                      

df_spmn = 

    tests %>%
    select(SiteName, ScientificName, SpecimenType, SpecimenID) %>%
    mutate(SpecimenType = paste0("n_",
                                 gsub(" ", "_", SpecimenType))) %>%
    group_by(SiteName, ScientificName, SpecimenType) %>%
    summarize(total = length(unique(SpecimenID))) %>%
    as.data.frame() %>%
    spread(SpecimenType, total) %>%
    adorn_totals("row") %>%
    select(-SiteName)

df_pos_spmn =

    tests %>%
    select(SiteName, ScientificName, SpecimenType, SpecimenID, VirusGroup) %>%
    filter( !is.na(VirusGroup)) %>%
    mutate(SpecimenType = gsub(" ", "_", SpecimenType)) %>%
    group_by(SiteName, ScientificName, SpecimenType) %>%
    summarize(pos = length(unique(SpecimenID))) %>%
    as.data.frame() %>%
    spread(SpecimenType, pos) %>%
    mutate_all(~replace_na(., 0)) %>%
    adorn_totals("row") %>%
    select( -SiteName)

df_pos_spmn$oral_swab = paste0(
                            round(df_pos_spmn$oral_swab/df_spmn$n_oral_swab * 100),
                            "% (", df_pos_spmn$oral_swab, ")")

df_pos_spmn$rectal_swab = paste0(
                              round(df_pos_spmn$rectal_swab/df_spmn$n_rectal_swab * 100),
                              "% (", df_pos_spmn$rectal_swab, ")")


pos_viruses =

    df %>%
    select(ScientificName, pos_virus) %>%
    filter( !is.na(pos_virus)) %>%
    separate_rows(pos_virus, sep = "; ") %>%
    group_by(ScientificName, pos_virus) %>%
    tally() %>%
    mutate(viruses = paste0(pos_virus, " (", n, ")")) %>%
    group_by(ScientificName) %>%
    summarize(
        Viruses = paste0(sort(unique(viruses)), collapse = "<br/><br/>"))


bat_virus_info = left_join(df_ani, df_pos_spmn, by = "ScientificName")
bat_virus_info = left_join(bat_virus_info, pos_viruses, by = "ScientificName")

datatable(bat_virus_info, escape = FALSE, rownames = FALSE)



Figure 2. Summary by sampling month, season, bat species, and sampling interface/location


Figure subcomponent 1: positives by sampling month and season

p_df = df %>%
    group_by(EventMY, is_positive) %>%
    arrange(EventMY) %>%
    summarize(n = n()) %>%
    mutate(is_positive = ifelse(is_positive, "Virus Positive", "Virus Negative")) %>%
    mutate(is_positive = factor(is_positive, levels = c("Virus Positive", "Virus Negative")))

levels(p_df$EventMY) = gsub("-", "\n", levels(p_df$EventMY))

event_plot = ggplot(p_df, aes(x = EventMY, y = n, fill = is_positive, label = n)) +
    geom_bar(stat="identity", width = 0.5) +
    scale_fill_manual(values = c("#BD4F2C", "#2C42BD"), name = "Test result") +
    theme_ipsum(base_size = 12) +
    ylab("Number of bats") +
    xlab("") +
    theme(
        panel.grid.minor.x = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid = element_blank(),
        panel.border = element_blank(),
        legend.position = "top") +
    scale_y_continuous(expand = c(0,1))

p_season_df = df %>%
    group_by(EventMY, SeasonModelled) %>%
    arrange(EventMY) %>%
    summarize(n = n())

levels(p_season_df$EventMY) = gsub("-", "\n", levels(p_season_df$EventMY))

event_plot = event_plot + geom_text(data = p_season_df,
                       aes(label = SeasonModelled,
                           x = EventMY, y = n,
                           fill = NULL),
                       nudge_y = 2,
                       size = 3)


cowplot::save_plot("results/figs/event-plot.pdf", event_plot, base_height = 6)
cowplot::save_plot("results/figs/event-plot.png", event_plot, base_height = 6)

event_plot



Figure subcomponent 2: bat species tested

bat_plot = df %>%
    select(ScientificName, is_positive) %>%
    group_by(ScientificName, is_positive) %>%
    summarize(n = n()) %>%
    arrange(desc(n)) %>%
    as.data.frame %>%
    mutate(ScientificName = factor(ScientificName,
                                   levels = c(
                                                "Neoromicia cf. somalica",
                                                "Mops midas",
                                                "Chaerephon pumilus",
                                                "Rhinopoma hardwickii"
                                            ))) %>%
    ggplot( aes(x = ScientificName, y= -n,
                fill = as.factor(-is_positive), width = 0.75)) +
    geom_bar(stat="identity") +
    scale_fill_manual(values = c("#BD4F2C", "#2C42BD"), name = "") +
    scale_x_discrete(name = "", position = "top") +
    scale_y_continuous(name = "No. of bats",
                       breaks = seq(0, -300, by = -50),  # y axis values (before coord_flip) 
                       labels = seq(0,  300, by =  50)) +
    coord_flip() +
    theme_ipsum(base_size = 12, plot_title_size = 12) +
    theme(
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position="none") +
    xlab("") +
    ylab("Number of bats") +
    labs(title = "Species tested")

cowplot::save_plot("results/figs/bat-plot.pdf", bat_plot, base_height = 3)

bat_plot



Figure subcomponent 3: Pie charts for species-event timeline

sp_df = df %>%
    select(ScientificName, is_positive, EventDate) %>%
    group_by(ScientificName, is_positive, EventDate) %>%
    summarize(n = n()) %>%
    arrange(desc(n)) %>%
    mutate(sp_e = paste0(EventDate, "_", ScientificName)) %>%
    as.data.frame

sp_l = split(sp_df, sp_df$sp_e)

# ref:
# http://www.sthda.com/english/wiki/ggplot2-pie-chart-quick-start-guide-r-software-and-data-visualization

blank_theme = theme_minimal()+
    theme(
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_blank(),
        panel.border = element_blank(),
        panel.grid=element_blank(),
        axis.ticks = element_blank(),
        plot.title=element_text(size=14, face="bold"),
        legend.position = "none"
    )


out_dir = "results/figs/event_pie"
if( !dir.exists(out_dir)) dir.create(out_dir)

invisible(lapply(sp_l, function(sp, out_dir) {
    
    if(nrow(sp) > 1) {
        sp_p = sp %>%
            ggplot(aes(x = "", y = n, fill = as.factor(-is_positive))) +
            geom_bar(width = 1, stat="identity") +
            scale_fill_manual(values = c("#BD4F2C", "#2C42BD"), name = "") +
            coord_polar("y", start=0) +
            theme_minimal() +
            blank_theme

        
    } else {
        sp_p = sp %>%
            ggplot(aes(x = "", y = n, fill = "#B9B7B6")) +
            geom_bar(width = 1, stat="identity") +
            scale_fill_manual(values = "#2C42BD", name = "") +
            coord_polar("y", start=0) +
            theme_minimal() +
            blank_theme

    }

    out_name_pdf = file.path(out_dir, paste0(gsub(" ", "_", unique(sp$sp_e)), ".pdf"))
        out_name_png = file.path(out_dir, paste0(gsub(" ", "_", unique(sp$sp_e)), ".png"))
        cowplot::save_plot(out_name_pdf, sp_p, base_height = .5)
        cowplot::save_plot(out_name_png, sp_p, base_height = .5)
        
}, out_dir))
out_dir = file.path(here(), "results/figs/event_pie")
myimages = list.files(out_dir, pattern = ".png", full.names = TRUE)
include_graphics(myimages)

Final Figure 2 compiled with above subcomponents in Affinity Designer 1.8.3

out_file = file.path(here(), "manuscript_figures")
myimages = list.files(out_file, pattern = glob2rx("Figure-2*png"),
                      full.names = TRUE)
include_graphics(myimages)



Figure 3. Viral findings by bat species and sampling interface



Figure subcomponent 1: bar chart

df_p =

    df %>%
    mutate(SeasonModelled = factor(SeasonModelled),
           SeasonModelled = fct_rev(SeasonModelled),
           ScientificName = ifelse(ScientificName %in% "Rhinopoma hardwickii",
                                   paste0(ScientificName, " (Cave)"),
                                   paste0(ScientificName, " (Building)")))
    

virus_plot =

    df_p %>%
    select(pos_virus, pos_virus_n, pos_virus_family,
           ScientificName, SeasonModelled) %>%
    drop_na() %>%
    mutate(pos_virus = gsub("; ", "\nand ", pos_virus)) %>%
    group_by(pos_virus, pos_virus_n, pos_virus_family,
             ScientificName, SeasonModelled) %>%
    summarize(n = n()) %>%
    arrange(pos_virus_family, desc(n)) %>%
    as.data.frame %>%
    mutate(pos_virus = factor(pos_virus, unique(pos_virus)),
           pos_virus = fct_rev(pos_virus)) %>%
    ggplot(aes(x = reorder(pos_virus, n), y=n, fill = ScientificName, label = n) ) +
    facet_wrap( ~ SeasonModelled ) +
    scale_fill_manual(values = c("#CC6677", "#44AA99", "#DDCC77", "#AA4499"),
                      name = "Bat species and\nsampling interface") +
    geom_bar(stat="identity") +
    ylim(0, 80) +
    # geom_text( size = 3, nudge_y = 4) +
    coord_flip() +
    theme_ipsum(base_size = 10) +
    theme(
        axis.text.y = element_text(size = 8),
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position="right",
        legend.text = element_text(face = "italic")
    ) +
    xlab("") +
    ylab("Number of bats sampled") +
    labs(title = "Viruses detected in bats")


cowplot::save_plot("results/figs/virus-plot.pdf",
                   virus_plot, base_height = 6)

virus_plot



Figure subcomponent 2: Extra legend information

df %>%
group_by(ScientificName, SeasonModelled, is_positive) %>%
    tally() %>%
    mutate(is_positive = ifelse(is_positive, "Positive", "Negative")) %>%
    pivot_wider(names_from = is_positive, values_from = n) %>%
    adorn_totals("col") %>%
    mutate_all(~replace_na(., 0)) %>%
    mutate(pos_percent = round((Positive/Total * 100), 2)) %>%
    mutate(info = paste0(pos_percent, "% (", Positive, "/", Total, ")")) %>%
    select(ScientificName, SeasonModelled, info) %>%
    pivot_wider(names_from = SeasonModelled, values_from = info) %>%
    datatable(options = list(dom = 't'))



Final Figure 3 compiled with above subcomponents in Affinity Designer 1.8.3

out_file = file.path(here(), "manuscript_figures")
myimages = list.files(out_file, pattern = glob2rx("Figure-3*png"),
                      full.names = TRUE)
include_graphics(myimages)



Viruses detected

tbl = xtable(table(unlist(strsplit(df$pos_virus[ df$is_positive ], "; "))))
names(tbl) = "No. of bats testing positive for virus"
datatable(tbl)



Coinfections

df %>%
    filter( is_positive & pos_virus_n > 1 ) %>%
    select(SiteName, ScientificName, pos_virus) %>%
    group_by(SiteName, ScientificName, pos_virus) %>%
    summarize(n_bats = n()) %>%
    datatable(colnames = c("SiteName", "ScientificName", "Virus coinfection",
                           "No. of bats"))



Virus detection by specimen type

Breakdown of positive specimen types (at individual bat level)

Rectal swabs were the most common specimen type to yield a positive virus sequence (92/99 bats), including 27 bats from whom sequences were confirmed in both rectal and oral swabs. In addition, seven bats had viruses detected only via their oral swabs.

df %>%
    filter(is_positive) %>%
    group_by(pos_specimen_type) %>%
    tally() %>%
    adorn_totals("row") %>%
    datatable(options = list(dom = 't'))



Positive specimen types for bats with > 1 virus detected

ids = df$AnimalID[ which(df$pos_virus_n > 1) ]
multi_bats = unique(
                 tests[ tests$AnimalID %in% ids & !is.na(tests$VirusGroup),
                       c("AnimalID", "SpecimenType", "VirusGroup")]
             )


tbls = vector("list", 3)
for(i in seq_along(ids)) {
    tbls[[ i ]] = 
        multi_bats %>%
        filter(AnimalID %in% ids[ i ]) %>%
        datatable(options = list(dom = 't'))
}

tbls[[1]]
tbls[[2]]
tbls[[3]]



Virus group by specimen type (at individual bat level)

df %>%
    filter(is_positive) %>%
    select(pos_virus, pos_specimen_type) %>%
    group_by(pos_virus, pos_specimen_type) %>%
    tally() %>%
    as_data_frame() %>%
    spread(pos_specimen_type, n) %>%
    mutate_all(~replace_na(., 0)) %>%
    adorn_totals(c("row", "col")) %>%
    datatable(options = list(dom = 't'),
              colnames = c("Virus", "Oral swab only", "Oral and rectal swab",
                           "Rectal swab only", "Total"),
              rownames = FALSE)



Table 2. Viral prevalence in bats by sex, age class, season, species, and specimen type

Fisher's exact tests

# Sex, age class, season, species, and interface
# ------------------------------------------------------------------------------
dependent = "is_positive"
explanatory = c("Sex",
                "AgeClass",
                "SeasonModelled",
                "ScientificName",
                "Interface")

t1 = df %>%
    summary_factorlist(dependent, explanatory, 
                       p = TRUE,
                       p_cat = "fisher") # Fisher's Exact test 
                       

# Specimen type
# ------------------------------------------------------------------------------
df_spm = df[ , c("AnimalID", "is_positive", "pos_specimen_type")]
df_spm = rbind(df_spm, df_spm)
df_spm$specimen = rep(c("oral swab", "rectal swab"), each = nrow(df))

df_spm$specimen_pos = mapply(grepl, df_spm$specimen, df_spm$pos_specimen_type)

t2 = df_spm %>%
    summary_factorlist("specimen_pos", "specimen", 
                       p = TRUE,
                       p_cat = "fisher") # Fisher's Exact test 


# Output
# ------------------------------------------------------------------------------
ft = rbind(t1, t2)
names(ft) = c("Group", "Variable", "FALSE", "TRUE", "p")

ft$pos = as.numeric(gsub("\\s*\\([^\\)]+\\)", "", ft$`TRUE`))
ft$neg = as.numeric(gsub("\\s*\\([^\\)]+\\)", "", ft$`FALSE`))
ft$pos_percent = round((ft$pos/(ft$pos + ft$neg) * 100), 1)

ft_out = ft %>%
    mutate(pos_percent = paste0(pos_percent, "% (", pos, "/", pos + neg, ")")) %>%
    select(Group, Variable, pos_percent, p)

knitr::kable(ft_out, align = c("l", "l", "r", "r"), caption = "Table 2")
Table 2
Group Variable pos_percent p
Sex female 16.1% (66/409) 0.550
male 18.3% (33/180)
AgeClass adult 15.4% (82/533) 0.008
subadult 30.4% (17/56)
SeasonModelled Dry 5.1% (6/117) <0.001
Wet 19.7% (93/472)
ScientificName Chaerephon pumilus 6.1% (14/228) <0.001
Mops midas 6.7% (12/180)
Neoromicia cf. somalica 14.3% (1/7)
Rhinopoma hardwickii 41.4% (72/174)
Interface Building 6.5% (27/415) <0.001
Cave 41.4% (72/174)
specimen oral swab 5.8% (34/589) <0.001
rectal swab 15.6% (92/589)



Table 3. Association between bats testing positive for coronaviruses or paramyxoviruses and their species, sex, age, and season in which they were sampled

Multivariable logistic regression analyses


df_filter = df %>%
    filter(! ScientificName %in% c("Neoromicia cf. somalica"))

glm(is_positive ~ Sex + AgeClass + SeasonModelled + ScientificName,
    data = df_filter, family = binomial) %>%
    model_parameters(exponentiate = TRUE) %>%
    print_md()
Parameter Odds Ratio SE 95% CI z p
(Intercept) 0.02 0.01 (7.15e-03, 0.06) -6.86 < .001
Sex (male) 1.35 0.36 (0.80, 2.28) 1.13 0.259
AgeClass (subadult) 1.26 0.45 (0.62, 2.50) 0.64 0.522
SeasonModelled (Wet) 2.67 1.34 (1.09, 8.06) 1.96 0.050
ScientificName (Mops midas) 1.35 0.56 (0.59, 3.05) 0.71 0.475
ScientificName (Rhinopoma hardwickii) 10.39 3.34 (5.69, 20.23) 7.28 < .001



Other

Association between bats testing positive for coronaviruses or paramyxoviruses and their species, sex, age, and season in which they were sampled (Chaerephon pumilus and Rhinopoma hardwickii)

df_filter = df %>%
    filter(! ScientificName %in% c("Neoromicia cf. somalica", "Mops midas"))

glm(is_positive ~ Sex + AgeClass + SeasonModelled + ScientificName,
    data = df_filter, family = binomial) %>%
    model_parameters(exponentiate = TRUE) %>%
    print_md()
Parameter Odds Ratio SE 95% CI z p
(Intercept) 0.02 0.01 (2.30e-03, 0.06) -5.37 < .001
Sex (male) 1.31 0.38 (0.73, 2.33) 0.92 0.359
AgeClass (subadult) 1.23 0.44 (0.60, 2.45) 0.58 0.562
SeasonModelled (Wet) 4.33 3.30 (1.20, 27.83) 1.92 0.055
ScientificName (Rhinopoma hardwickii) 10.22 3.30 (5.59, 19.93) 7.21 < .001



Association between bats testing positive for coronaviruses or paramyxoviruses and their species, sex, and season in which they were sampled (Chaerephon pumilus, Rhinopoma hardwickii and Mops midas)

df_filter = df %>%
    filter(! ScientificName %in% c("Neoromicia cf. somalica"))

glm(is_positive ~ Sex + SeasonModelled + ScientificName,
    data = df_filter, family = binomial) %>%
    model_parameters(exponentiate = TRUE) %>%
    print_md()
Parameter Odds Ratio SE 95% CI z p
(Intercept) 0.02 0.01 (7.21e-03, 0.06) -6.85 < .001
Sex (male) 1.35 0.36 (0.80, 2.27) 1.13 0.260
SeasonModelled (Wet) 2.73 1.37 (1.11, 8.23) 2.01 0.044
ScientificName (Mops midas) 1.31 0.54 (0.57, 2.95) 0.66 0.512
ScientificName (Rhinopoma hardwickii) 10.53 3.38 (5.78, 20.49) 7.34 < .001